Chapter 5 Bacteria
genome_metadata %>%
filter(domain == "d__Bacteria") %>%
group_by(phylum) %>%
summarise(mag_n=n()) %>%
arrange(-mag_n) %>%
tt()| phylum | mag_n |
|---|---|
| p__Bacteroidota | 369 |
| p__Bacillota_A | 251 |
| p__Pseudomonadota | 157 |
| p__Bacillota | 54 |
| p__Desulfobacterota | 37 |
| p__Verrucomicrobiota | 29 |
| p__Patescibacteria | 27 |
| p__Cyanobacteriota | 23 |
| p__Fusobacteriota | 12 |
| p__Chloroflexota | 10 |
| p__Bacillota_C | 9 |
| p__Deferribacterota | 8 |
| p__Acidobacteriota | 6 |
| p__Bacillota_B | 5 |
| p__Desulfobacterota_F | 5 |
| p__Actinomycetota | 4 |
| p__Planctomycetota | 4 |
| p__Spirochaetota | 4 |
| p__Campylobacterota | 3 |
| p__Myxococcota | 2 |
| p__ | 1 |
| p__Chlamydiota | 1 |
| p__Desulfobacterota_G | 1 |
| p__Fibrobacterota | 1 |
| p__J088 | 1 |
| p__JAKLEM01 | 1 |
| p__Omnitrophota | 1 |
5.1 Genome quality
genome_metadata %>%
summarise(completeness_mean=mean(completeness) %>% round(2) %>% as.character(),
completeness_sd=sd(completeness) %>% round(2) %>% as.character(),
contamination_mean=mean(contamination) %>% round(2),
contamination_sd=sd(contamination) %>% round(2)) %>%
unite("Completeness",completeness_mean, completeness_sd, sep = " ± ", remove = TRUE) %>%
unite("Contamination",contamination_mean, contamination_sd, sep = " ± ", remove = TRUE) %>%
tt()| Completeness | Contamination |
|---|---|
| 82.78 ± 17.27 | 1.9 ± 2.36 |
#Generate quality biplot
genome_biplot <- genome_metadata %>%
select(c(genome,domain,phylum,completeness,contamination,length)) %>%
arrange(match(genome, rev(genome_tree$tip.label))) %>% #sort MAGs according to phylogenetic tree
ggplot(aes(x=completeness,y=contamination,size=length,color=phylum)) +
geom_point(alpha=0.7) +
ylim(c(10,0)) +
scale_color_manual(values=phylum_colors) +
labs(y= "Contamination", x = "Completeness") +
theme_classic() +
theme(legend.position = "none")
#Generate contamination boxplot
genome_contamination <- genome_metadata %>%
ggplot(aes(y=contamination)) +
ylim(c(10,0)) +
geom_boxplot(colour = "#999999", fill="#cccccc") +
theme_void() +
theme(legend.position = "none",
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
plot.margin = unit(c(0, 0, 0.40, 0),"inches")) #add bottom-margin (top, right, bottom, left)
#Generate completeness boxplot
genome_completeness <- genome_metadata %>%
ggplot(aes(x=completeness)) +
xlim(c(50,100)) +
geom_boxplot(colour = "#999999", fill="#cccccc") +
theme_void() +
theme(legend.position = "none",
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
plot.margin = unit(c(0, 0, 0, 0.50),"inches")) #add left-margin (top, right, bottom, left)
#Render composite figure
grid.arrange(grobs = list(genome_completeness,genome_biplot,genome_contamination),
layout_matrix = rbind(c(1,1,1,1,1,1,1,1,1,1,1,4),
c(2,2,2,2,2,2,2,2,2,2,2,3),
c(2,2,2,2,2,2,2,2,2,2,2,3),
c(2,2,2,2,2,2,2,2,2,2,2,3),
c(2,2,2,2,2,2,2,2,2,2,2,3),
c(2,2,2,2,2,2,2,2,2,2,2,3),
c(2,2,2,2,2,2,2,2,2,2,2,3),
c(2,2,2,2,2,2,2,2,2,2,2,3),
c(2,2,2,2,2,2,2,2,2,2,2,3),
c(2,2,2,2,2,2,2,2,2,2,2,3),
c(2,2,2,2,2,2,2,2,2,2,2,3),
c(2,2,2,2,2,2,2,2,2,2,2,3)))5.2 Functional overview
# Aggregate basal GIFT into elements
function_table <- genome_gifts %>%
to.elements(., GIFT_db)
# Generate basal tree
function_tree <- force.ultrametric(genome_tree, method="extend") %>%
ggtree(., size = 0.3) ***************************************************************
* Note: *
* force.ultrametric does not include a formal method to *
* ultrametricize a tree & should only be used to coerce *
* a phylogeny that fails is.ultrametric due to rounding -- *
* not as a substitute for formal rate-smoothing methods. *
***************************************************************
#Add phylum colors next to the tree tips
function_tree <- gheatmap(function_tree, phylum_heatmap, offset=0, width=0.1, colnames=FALSE) +
scale_fill_manual(values=phylum_colors) +
labs(fill="Phylum")
#Reset fill scale to use a different colour profile in the heatmap
function_tree <- function_tree + new_scale_fill()
#Add functions heatmap
function_tree <- gheatmap(function_tree, function_table, offset=0.5, width=3.5, colnames=FALSE) +
vexpand(.08) +
coord_cartesian(clip = "off") +
scale_fill_gradient(low = "#f4f4f4", high = "steelblue", na.value="white") +
labs(fill="GIFT")
#Reset fill scale to use a different colour profile in the heatmap
function_tree <- function_tree + new_scale_fill()
# Add completeness barplots
function_tree <- function_tree +
geom_fruit(data=genome_metadata,
geom=geom_bar,
grid.params=list(axis="x", text.size=2, nbreak = 1),
axis.params=list(vline=TRUE),
mapping = aes(x=length, y=genome, fill=completeness),
offset = 3.8,
orientation="y",
stat="identity") +
scale_fill_gradient(low = "#cf8888", high = "#a2cc87") +
labs(fill="Genome\ncompleteness")
function_tree5.3 Functional ordination
# Generate the tSNE ordination
tSNE_function <- Rtsne(X=function_table, dims = 2, check_duplicates = FALSE)
# Plot the ordination
function_ordination <- tSNE_function$Y %>%
as.data.frame() %>%
mutate(genome=rownames(function_table)) %>%
inner_join(genome_metadata, by="genome") %>%
rename(tSNE1="V1", tSNE2="V2") %>%
select(genome,phylum,tSNE1,tSNE2, length) %>%
ggplot(aes(x = tSNE1, y = tSNE2, color = phylum, size=length))+
geom_point(shape=16, alpha=0.7) +
scale_color_manual(values=phylum_colors) +
theme_minimal() +
labs(color="Phylum", size="Genome size") +
guides(color = guide_legend(override.aes = list(size = 5))) # enlarge Phylum dots in legend
function_ordination